From Generation Scotland,
Wave 1: N = 4977
Wave 2: N = 4312
Processed by Rosie, using EPIC array. M values.
Relatedness controlled for by regressing against GRM. Other regressors include batch and cell proportion.
Ever smoke, pack years, age, sex and 20 methylation PCs.
95% CI for distance to the nearest MDD risk locus:
GO.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.GO.txt'),header=T,stringsAsFactors=F)
KEGG.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.KEGG.txt'),header=T,stringsAsFactors=F)
GO.result.sig=GO.result[order(GO.result$FDR,decreasing=F),] %>%
filter(.,FDR<0.05)
KEGG.result.sig=KEGG.result[order(KEGG.result$FDR,decreasing=F),] %>%
filter(.,FDR<0.05)
# pathway analysis for non-MHC probes
pathway.dat=fig.dat.GS.pT.5e_08 %>%
mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No')) %>%
filter(.,is.MHC == 'No')
GO.result.sig_noMHC <-
gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05],
all.cpg=pathway.dat$CpG, collection="GO",
array.type = "EPIC") %>%
.[order(.$P.DE,decreasing=F),]
KEGG.result.sig_noMHC <-
gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05],
all.cpg=pathway.dat$CpG, collection="KEGG",
array.type = "EPIC") %>%
.[order(.$FDR,decreasing=F),]
| ID | ONTOLOGY | TERM | N | DE | P.DE | FDR |
|---|---|---|---|---|---|---|
| GO:0002476 | BP | antigen processing and presentation of endogenous peptide antigen via MHC class Ib | 8 | 8 | 0 | 0 |
| GO:0002484 | BP | antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway | 7 | 7 | 0 | 0 |
| GO:0002486 | BP | antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway, TAP-independent | 7 | 7 | 0 | 0 |
| GO:0042611 | CC | MHC protein complex | 23 | 17 | 0 | 0 |
| GO:0071556 | CC | integral component of lumenal side of endoplasmic reticulum membrane | 26 | 15 | 0 | 0 |
| GO:0098553 | CC | lumenal side of endoplasmic reticulum membrane | 26 | 15 | 0 | 0 |
| GO:0098576 | CC | lumenal side of membrane | 31 | 15 | 0 | 0 |
| GO:0042605 | MF | peptide antigen binding | 23 | 13 | 0 | 0 |
| GO:0002478 | BP | antigen processing and presentation of exogenous peptide antigen | 164 | 22 | 0 | 0 |
| GO:0019884 | BP | antigen processing and presentation of exogenous antigen | 171 | 22 | 0 | 0 |
| GO:0048002 | BP | antigen processing and presentation of peptide antigen | 177 | 22 | 0 | 0 |
| GO:0042613 | CC | MHC class II protein complex | 14 | 10 | 0 | 0 |
| GO:0060333 | BP | interferon-gamma-mediated signaling pathway | 86 | 17 | 0 | 0 |
| GO:0019882 | BP | antigen processing and presentation | 212 | 22 | 0 | 0 |
| GO:0012507 | CC | ER to Golgi transport vesicle membrane | 58 | 14 | 0 | 0 |
| GO:0003823 | MF | antigen binding | 52 | 13 | 0 | 0 |
| GO:0002250 | BP | adaptive immune response | 393 | 26 | 0 | 0 |
| GO:0030176 | CC | integral component of endoplasmic reticulum membrane | 145 | 18 | 0 | 0 |
| GO:0002428 | BP | antigen processing and presentation of peptide antigen via MHC class Ib | 9 | 8 | 0 | 0 |
| GO:0031227 | CC | intrinsic component of endoplasmic reticulum membrane | 153 | 18 | 0 | 0 |
| ID | Description | N | DE | P.DE | FDR |
|---|---|---|---|---|---|
| path:hsa04612 | Antigen processing and presentation | 66 | 20.0 | 0e+00 | 0.0e+00 |
| path:hsa05330 | Allograft rejection | 34 | 15.0 | 0e+00 | 0.0e+00 |
| path:hsa04940 | Type I diabetes mellitus | 41 | 16.0 | 0e+00 | 0.0e+00 |
| path:hsa05332 | Graft-versus-host disease | 37 | 15.0 | 0e+00 | 0.0e+00 |
| path:hsa05320 | Autoimmune thyroid disease | 46 | 15.0 | 0e+00 | 0.0e+00 |
| path:hsa05416 | Viral myocarditis | 55 | 15.0 | 0e+00 | 0.0e+00 |
| path:hsa04145 | Phagosome | 140 | 18.0 | 0e+00 | 0.0e+00 |
| path:hsa05310 | Asthma | 27 | 9.0 | 0e+00 | 0.0e+00 |
| path:hsa05322 | Systemic lupus erythematosus | 112 | 13.5 | 0e+00 | 0.0e+00 |
| path:hsa04514 | Cell adhesion molecules | 135 | 16.0 | 0e+00 | 0.0e+00 |
| path:hsa05150 | Staphylococcus aureus infection | 82 | 11.0 | 0e+00 | 0.0e+00 |
| path:hsa05169 | Epstein-Barr virus infection | 193 | 17.0 | 0e+00 | 0.0e+00 |
| path:hsa05166 | Human T-cell leukemia virus 1 infection | 211 | 18.5 | 0e+00 | 0.0e+00 |
| path:hsa04672 | Intestinal immune network for IgA production | 43 | 9.0 | 0e+00 | 0.0e+00 |
| path:hsa05168 | Herpes simplex virus 1 infection | 465 | 20.0 | 0e+00 | 1.0e-07 |
| path:hsa05145 | Toxoplasmosis | 106 | 12.0 | 0e+00 | 2.0e-07 |
| path:hsa05321 | Inflammatory bowel disease | 61 | 9.0 | 0e+00 | 4.0e-07 |
| path:hsa05140 | Leishmaniasis | 68 | 9.0 | 0e+00 | 6.0e-07 |
| path:hsa05323 | Rheumatoid arthritis | 87 | 9.0 | 2e-07 | 3.0e-06 |
| path:hsa04640 | Hematopoietic cell lineage | 90 | 9.0 | 3e-07 | 4.6e-06 |
| ONTOLOGY | TERM | N | DE | P.DE | FDR | |
|---|---|---|---|---|---|---|
| GO:0060629 | BP | regulation of homologous chromosome segregation | 2 | 1 | 0.0016177 | 1 |
| GO:0006599 | BP | phosphagen metabolic process | 4 | 1 | 0.0019652 | 1 |
| GO:0006603 | BP | phosphocreatine metabolic process | 4 | 1 | 0.0019652 | 1 |
| GO:0042396 | BP | phosphagen biosynthetic process | 4 | 1 | 0.0019652 | 1 |
| GO:0046314 | BP | phosphocreatine biosynthetic process | 4 | 1 | 0.0019652 | 1 |
| GO:0003050 | BP | regulation of systemic arterial blood pressure by atrial natriuretic peptide | 2 | 1 | 0.0032532 | 1 |
| GO:0021691 | BP | cerebellar Purkinje cell layer maturation | 2 | 1 | 0.0032875 | 1 |
| GO:0006600 | BP | creatine metabolic process | 6 | 1 | 0.0034783 | 1 |
| GO:0021590 | BP | cerebellum maturation | 3 | 1 | 0.0040527 | 1 |
| GO:0021699 | BP | cerebellar cortex maturation | 3 | 1 | 0.0040527 | 1 |
| GO:0004111 | MF | creatine kinase activity | 5 | 1 | 0.0043818 | 1 |
| GO:0030644 | BP | cellular chloride ion homeostasis | 4 | 1 | 0.0048479 | 1 |
| GO:0016775 | MF | phosphotransferase activity, nitrogenous group as acceptor | 6 | 1 | 0.0050180 | 1 |
| GO:0042138 | BP | meiotic DNA double-strand break formation | 7 | 1 | 0.0054297 | 1 |
| GO:0035617 | BP | stress granule disassembly | 5 | 1 | 0.0054826 | 1 |
| GO:0001093 | MF | TFIIB-class transcription factor binding | 3 | 1 | 0.0054879 | 1 |
| GO:0051983 | BP | regulation of chromosome segregation | 97 | 2 | 0.0056624 | 1 |
| GO:1902412 | BP | regulation of mitotic cytokinesis | 6 | 1 | 0.0059002 | 1 |
| GO:0070369 | CC | beta-catenin-TCF7L2 complex | 3 | 1 | 0.0062280 | 1 |
| GO:0060631 | BP | regulation of meiosis I | 8 | 1 | 0.0063037 | 1 |
| Description | N | DE | P.DE | FDR | |
|---|---|---|---|---|---|
| path:hsa00010 | Glycolysis / Gluconeogenesis | 64 | 0 | 1 | 1 |
| path:hsa00020 | Citrate cycle (TCA cycle) | 28 | 0 | 1 | 1 |
| path:hsa00030 | Pentose phosphate pathway | 25 | 0 | 1 | 1 |
| path:hsa00040 | Pentose and glucuronate interconversions | 32 | 0 | 1 | 1 |
| path:hsa00051 | Fructose and mannose metabolism | 32 | 0 | 1 | 1 |
| path:hsa00052 | Galactose metabolism | 29 | 0 | 1 | 1 |
| path:hsa00053 | Ascorbate and aldarate metabolism | 27 | 0 | 1 | 1 |
| path:hsa00061 | Fatty acid biosynthesis | 15 | 0 | 1 | 1 |
| path:hsa00062 | Fatty acid elongation | 26 | 0 | 1 | 1 |
| path:hsa00071 | Fatty acid degradation | 42 | 0 | 1 | 1 |
| path:hsa00072 | Synthesis and degradation of ketone bodies | 9 | 0 | 1 | 1 |
| path:hsa00100 | Steroid biosynthesis | 18 | 0 | 1 | 1 |
| path:hsa00120 | Primary bile acid biosynthesis | 17 | 0 | 1 | 1 |
| path:hsa00130 | Ubiquinone and other terpenoid-quinone biosynthesis | 11 | 0 | 1 | 1 |
| path:hsa00140 | Steroid hormone biosynthesis | 56 | 0 | 1 | 1 |
| path:hsa00190 | Oxidative phosphorylation | 115 | 0 | 1 | 1 |
| path:hsa00220 | Arginine biosynthesis | 20 | 0 | 1 | 1 |
| path:hsa00230 | Purine metabolism | 125 | 0 | 1 | 1 |
| path:hsa00232 | Caffeine metabolism | 5 | 0 | 1 | 1 |
| path:hsa00240 | Pyrimidine metabolism | 55 | 0 | 1 | 1 |
# 1. No. of significant CpGs in the MHC region
ewas.pTwgsig = ewas.pTwgsig %>%
mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No'))
no.MHC.sig = table(ewas.pTwgsig$is.MHC[ewas.pTwgsig$p.adj<0.05])
no.MHC.sig
##
## No Yes
## 37 688
# 2. Top 10 CpGs
top.10.sig = ewas.pTwgsig %>%
filter(.,p.adj<0.05) %>%
.[order(.$P.value,decreasing = F),]
top.10.sig[1:10,] %>%
kbl() %>%
kable_material(c("striped", "hover"))
| CpG | Allele1 | Allele2 | Effect | StdErr | P.value | Direction | HetISq | HetChiSq | HetDf | HetPVal | CHR | MAPINFO | p.adj | distance_to_nearest_gwashit | EWAS.sig | is.MHC | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 84 | cg03270340 | NA | NA | 55.5951 | 2.4688 | 0 | ++ | 97.6 | 41.064 | 1 | 0 | 6 | 28891204 | 0 | 3551 | yes | Yes |
| 29 | cg00903577 | NA | NA | 63.8098 | 2.8351 | 0 | ++ | 99.1 | 106.379 | 1 | 0 | 6 | 28831109 | 0 | 3099 | yes | Yes |
| 354 | cg12914966 | NA | NA | 79.4726 | 3.6703 | 0 | ++ | 99.4 | 171.532 | 1 | 0 | 6 | 28830789 | 0 | 2779 | yes | Yes |
| 470 | cg16677399 | NA | NA | 51.7185 | 2.4974 | 0 | ++ | 99.5 | 212.566 | 1 | 0 | 6 | 28830902 | 0 | 2892 | yes | Yes |
| 389 | cg14345882 | NA | NA | -51.2371 | 2.5431 | 0 | – | 99.3 | 148.961 | 1 | 0 | 6 | 26364793 | 0 | 137 | yes | Yes |
| 182 | cg06608359 | NA | NA | 59.9925 | 3.0263 | 0 | ++ | 97.1 | 34.908 | 1 | 0 | 6 | 28831300 | 0 | 3290 | yes | Yes |
| 678 | cg25643361 | NA | NA | -44.1782 | 2.3263 | 0 | – | 98.1 | 53.559 | 1 | 0 | 6 | 26361389 | 0 | 41 | yes | Yes |
| 721 | cg27543291 | NA | NA | -55.5490 | 2.9654 | 0 | – | 99.3 | 148.203 | 1 | 0 | 6 | 26367644 | 0 | 10 | yes | Yes |
| 282 | cg10046620 | NA | NA | -41.8334 | 2.2423 | 0 | – | 98.5 | 66.796 | 1 | 0 | 6 | 27775042 | 0 | 14 | yes | Yes |
| 233 | cg08168669 | NA | NA | -38.2512 | 2.0864 | 0 | – | 99.4 | 172.357 | 1 | 0 | 6 | 26367580 | 0 | 74 | yes | Yes |
top10.max.p=max(top.10.sig$p.adj[1:10])
# 3. significant CpGs outside of the MHC region
top.10.sig.noMHC = ewas.pTwgsig %>%
filter(.,p.adj<0.05) %>%
filter(.,is.MHC=='No') %>%
.[order(.$P.value,decreasing = F),]
summary(top.10.sig.noMHC$p.adj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 6.480e-06 1.515e-03 9.472e-03 7.844e-03 4.836e-02
# 4. Top 10 non-MHC CpGs
top.10.sig.noMHC[1:10,] %>%
kbl() %>%
kable_material(c("striped", "hover"))
| CpG | Allele1 | Allele2 | Effect | StdErr | P.value | Direction | HetISq | HetChiSq | HetDf | HetPVal | CHR | MAPINFO | p.adj | distance_to_nearest_gwashit | EWAS.sig | is.MHC | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7 | cg05590274 | NA | NA | 16.9936 | 1.9302 | 0 | ++ | 94.4 | 17.997 | 1 | 0.0000221 | 11 | 113262625 | 0.0e+00 | 15822 | yes | No |
| 26 | cg17841099 | NA | NA | -12.1025 | 1.5366 | 0 | – | 97.9 | 48.399 | 1 | 0.0000000 | 1 | 153940090 | 0.0e+00 | 21962506 | yes | No |
| 35 | cg26200795 | NA | NA | 18.7156 | 2.4494 | 0 | ++ | 91.3 | 11.449 | 1 | 0.0007152 | 18 | 52895482 | 0.0e+00 | 5322 | yes | No |
| 8 | cg06276712 | NA | NA | -12.4238 | 1.6339 | 0 | – | 96.6 | 29.631 | 1 | 0.0000001 | 7 | 12107011 | 0.0e+00 | 140319 | yes | No |
| 27 | cg17862947 | NA | NA | 8.2523 | 1.1073 | 0 | ++ | 99.6 | 285.470 | 1 | 0.0000000 | 12 | 133086926 | 1.0e-07 | 11712199 | yes | No |
| 6 | cg05328885 | NA | NA | 30.1986 | 4.1802 | 0 | ++ | 95.9 | 24.148 | 1 | 0.0000009 | 11 | 30943623 | 4.0e-07 | 1541 | yes | No |
| 5 | cg04317648 | NA | NA | 11.9459 | 1.6968 | 0 | ++ | 86.6 | 7.456 | 1 | 0.0063220 | 1 | 8485376 | 1.5e-06 | 553 | yes | No |
| 4 | cg02403154 | NA | NA | -14.5513 | 2.0861 | 0 | – | 0.0 | 0.386 | 1 | 0.5345000 | 18 | 52989223 | 2.3e-06 | 17013 | yes | No |
| 22 | cg14159747 | NA | NA | -17.4837 | 2.5178 | 0 | – | 93.7 | 15.880 | 1 | 0.0000675 | 11 | 113255604 | 2.9e-06 | 22843 | yes | No |
| 15 | cg10154826 | NA | NA | 36.6969 | 5.3737 | 0 | ++ | 96.1 | 25.769 | 1 | 0.0000004 | 6 | 17600994 | 6.5e-06 | 572321 | yes | No |
top.10.sig.noMHC.sCHR=top.10.sig.noMHC[1:10,] %>%
.[order(.$CHR),]
top10.noMHC.max.p=max(top.10.sig.noMHC$p.adj[1:10])
DNA methylation: Processed by Riccardo. 450k array. M-values.
Genetic: HRCv1.1 imputed data
LD reference: 1000G CEU sample, phase 3, hg19 genome build
Smoking status, age, sex, 20 methylation PCs, and cell proportions.
From Mark, wiki.
Local copy (adapted to LBC data): /exports/igmm/eddie/GenScotDepression/shen/Tools/ewas_pipeline/
## Warning: Removed 30820 rows containing missing values (geom_point).
## Warning: Removed 30820 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
Correlation between betas in GS and LBC for all CpGs: r = 0.784
Correlation between betas in GS and LBC for CpGs not in MHC region: r = 0.771
Betas remained in the same direction in LBC: 86%
CpGs nominally significant in LBC: 33.7%
Figures saved as: MR_meth_MDD/Figs/LBC_EWAS_pplot*
## png
## 2
##### Calculate SNP-CpG distance
# Define function
cal_distance <- function(tmp.ids,ref.anno.snp,ref.anno.cpg){
snp.id = as.character(tmp.ids['SNP'])
cpg.id = as.character(tmp.ids['CpG'])
chr.snp = ref.anno.snp$CHR[ref.anno.snp$RSID==snp.id]
chr.cpg = ref.anno.cpg$CHR[ref.anno.cpg$ID==cpg.id]
bp.snp = ref.anno.snp$BP[ref.anno.snp$RSID==snp.id]
bp.cpg = ref.anno.cpg$MAPINFO[ref.anno.cpg$ID==cpg.id]
if(chr.snp!=chr.cpg){tmp.distance=-999}else{
tmp.distance = abs(bp.snp-bp.cpg)
}
return(tmp.distance)
}
# Reload data
load(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/cpg_snp_mapping_pNbeta.RData'))
ls.CpG = ls.CpG %>%
mutate(.,CHR=gsub('chr','',CHR)) %>%
mutate(.,CHR=as.numeric(CHR)) %>%
.[order(.$CHR,decreasing = F),] %>%
mutate(ID=as.character(ID))
colnames(ls.SNP.102)=c('CHR','BP','RSID')
ls.SNP.102 = ls.SNP.102 %>%
mutate(.,CHR=gsub('chr','',CHR)) %>%
mutate(.,CHR=as.numeric(CHR)) %>%
.[order(.$CHR,decreasing = F),] %>%
mutate(RSID=as.character(RSID)) %>%
mutate(is.MHC = if_else(BP>25000000&BP<35000000&CHR==6,'Yes','No'))
p.mat = p.mat %>% column_to_rownames(var="CpG")
colnames(p.mat)=strsplit(colnames(p.mat),split = '_') %>% unlist %>% .[grep('^rs',.)]
p.mat[p.mat==0]=1e-320 # Recover extremely low p-values to system threshold
p.mat[p.mat>0.05/102]=NA
p.mat.new = -log10(p.mat) %>%
.[ls.CpG$ID,] %>%
.[,ls.SNP.102$RSID]
# Transform matrix to long-format data
ids.input = expand.grid(dimnames(p.mat),stringsAsFactors = F)
colnames(ids.input) = c('CpG','SNP')
# Calc distance
distance.long = pbapply(ids.input,FUN=cal_distance,MARGIN = 1,
ref.anno.snp=ls.SNP.102,ref.anno.cpg=ls.CpG)
distance.long = data.frame(ids.input,distance.long)
# Transform results back to matrix
distance.mat = matrix(distance.long[,3],ncol=102,byrow = F)
colnames(distance.mat)=colnames(p.mat)
rownames(distance.mat)=rownames(p.mat)
distance.mat[distance.mat==-999]=NA
distance.mat.clean = distance.mat
distance.mat.clean[is.na(p.mat)]=NA
##### Summarise distance
# N sig CpG
colSums(!is.na(p.mat))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='Yes']]
## rs13198716 rs2393962 rs2232423 rs2232422 rs1265062 rs28732100 rs537160
## 689 523 689 430 668 444 643
colSums(!is.na(p.mat))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]
## rs301806 rs1002656 rs11579246 rs2842586 rs10789214 rs2568958 rs2841188
## 4 1 3 0 0 2 2
## rs4571923 rs2389024 rs10913112 rs11578222 rs12134279 rs4589792 rs1568452
## 1 0 0 1 0 0 0
## rs1518812 rs2697368 rs2111592 rs7622843 rs7617480 rs815710 rs6774461
## 0 0 0 0 1 0 1
## rs362331 rs7697558 rs1022079 rs6837598 rs10471540 rs2582045 rs6882046
## 0 0 1 0 0 1 1
## rs13167027 rs161645 rs10866752 rs4464770 rs12530388 rs1396898 rs6933618
## 0 0 0 0 0 0 0
## rs3823624 rs6966915 rs2522831 rs17157433 rs11561993 rs7807677 rs4350019
## 2 3 0 0 0 0 0
## rs7044150 rs10959573 rs10809510 rs3793577 rs1332437 rs13291610 rs3824344
## 1 0 1 0 0 0 0
## rs10973229 rs10759879 rs913930 rs549779 rs11818524 rs17766570 rs1021362
## 0 0 0 0 0 0 0
## rs11031225 rs7942007 rs1783541 rs10501403 rs7932640 rs1554929 rs6589377
## 1 1 0 1 0 2 2
## rs2187490 rs7978008 rs3883901 rs2056443 rs1343607 rs9592461 rs9531043
## 0 0 0 0 0 0 0
## rs4772087 rs8013655 rs1952039 rs7229 rs1045430 rs942866 rs657586
## 0 0 0 0 1 2 0
## rs8040191 rs716508 rs11077203 rs8061005 rs11642229 rs3091404 rs11643192
## 0 0 0 0 0 0 0
## rs1539853 rs7232543 rs1833288 rs4598994 rs1261086 rs7231748 rs784254
## 0 1 2 2 2 2 2
## rs7236339 rs2072971 rs9074 rs5995992
## 7 0 2 4
# Distance
distance.summary.trans =
colSums(distance.mat.clean>1000000,na.rm =T)[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]/
colSums(!is.na(distance.mat.clean))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]
distance.summary.trans = distance.summary.trans[!is.na(distance.mat.clean)>0]
Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.
Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)
In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.
615 had more than 30 genetic instruments available.
21 CpGs left after ‘clumping’ (window = 3Mb, r=0.1) in the MHC region.
Finally, 20 CpGs with >=5 genetic instruments after data harmonisation entered MR.
Analyses: IVW, Weighted-median, MR-Egger
Additional test: MR Steiger directionary test.
Package: TwoSampleMR
25 CpGs tested:
figs.MR$GMsa
figs.MR$GMthk
figs.MR$GMvol
figs.MR$WMgFA
figs.MR$WMgMD
figs.MR$WMgFA_TR
figs.MR$WMgMD_TR
figs.MR$WMFA_ATR
figs.MR$WMMD_ATR
reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
new.res = tmp.res[[tmp.trait]] %>%
mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='bonferroni')) %>%
filter(.,method==targetmethod)
}
ivw.res = as.list(names(res)) %>%
lapply(.,reorg_fornemat,tmp.res=res,
targetmethod='Inverse variance weighted',pval_col='pval') %>%
bind_rows %>%
mutate(log.p.val=-log10(pval))
egger.res = as.list(names(res)) %>%
lapply(.,reorg_fornemat,tmp.res=res,
targetmethod='MR Egger',pval_col='pval') %>%
bind_rows %>%
mutate(log.p.val=-log10(pval)) %>%
mutate(p.adj=pval)
wm.res = as.list(names(res)) %>%
lapply(.,reorg_fornemat,tmp.res=res,
targetmethod='Weighted median',pval_col='pval') %>%
bind_rows %>%
mutate(log.p.val=-log10(pval))
ivw.res$outcome = recode(ivw.res$outcome,
GMsa = 'Surface area',
GMthk = 'Cortical thickness',
GMvol = 'Cortical volume',
WMgMD = 'global MD',
WMgFA_TR = 'gFA in\nthalamic radiation',
WMgMD_TR = 'gFA in\nthalamic radiation',
WMFA_ATR = 'FA in anterior\nthalamic radiation',
WMMD_ATR = 'MD in anterior\nthalamic radiation')
egger.res$outcome = recode(egger.res$outcome,
GMsa = 'Surface area',
GMthk = 'Cortical thickness',
GMvol = 'Cortical volume',
WMgMD = 'global MD',
WMgFA_TR = 'gFA in\nthalamic radiation',
WMgMD_TR = 'gFA in\nthalamic radiation',
WMFA_ATR = 'FA in anterior\nthalamic radiation',
WMMD_ATR = 'MD in anterior\nthalamic radiation')
wm.res$outcome = recode(wm.res$outcome,
GMsa = 'Surface area',
GMthk = 'Cortical thickness',
GMvol = 'Cortical volume',
WMgMD = 'global MD',
WMgFA_TR = 'gFA in\nthalamic radiation',
WMgMD_TR = 'gFA in\nthalamic radiation',
WMFA_ATR = 'FA in anterior\nthalamic radiation',
WMMD_ATR = 'MD in anterior\nthalamic radiation')
# Prep data for graph
fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res)
fig.3=circular_graph_mr(target.res = wm.res)
fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)
ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total
Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.
Outcome: MDD meta-GWAS (PGC + UKBnoGS + 23andMe, N~=800k)
In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.
615 had more than 30 genetic instruments available.
21 CpGs left after ‘clumping’ (window = 250kb, r=0.1)
Finally, 25 CpGs with >=5 genetic instruments after data harmonisation entered MR.
Analyses: IVW, Weighted-median, MR-Egger
Additional test: MR Steiger directionary test.
Package: TwoSampleMR
25 CpGs tested:
dat.fig = res %>%
mutate(ord=1:nrow(.))
fig=
ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(pval), color=method)) +
geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
panel.background = element_blank(),
panel.grid = element_blank(),
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
scale_x_discrete(position='top')+
geom_hline(yintercept=0,color = "black", size=0.3)+
geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
coord_flip()+
ylab('P-value for MR')+
xlab('CpG')
fig
dat.fig = res %>%
filter(method=='MR Egger') %>%
mutate(ord=1:nrow(.))
fig=
ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(pval.1))) +
geom_point(color='tomato',position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
panel.background = element_blank(),
panel.grid = element_blank(),
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
scale_x_discrete(position='top')+
geom_hline(yintercept=0,color = "black", size=0.3)+
geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
coord_flip()+
ggtitle('MR Egger intercept')+
ylab('P-value for MR Egger intercept')+
xlab('CpG')
fig
dat.fig = res %>%
filter(method=='Inverse variance weighted') %>%
mutate(ord=1:nrow(.))
fig=
ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(Q_pval))) +
geom_point(color='tomato',position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
panel.background = element_blank(),
panel.grid = element_blank(),
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
scale_x_discrete(position='top')+
geom_hline(yintercept=0,color = "black", size=0.3)+
geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
coord_flip()+
ggtitle('Q test (heterogeneity)')+
ylab('P-value for Q statistics')+
xlab('CpG')
fig
dat.fig = res
fig=
ggplot(dat.fig, aes(x=-log10(pval.1), y=-log10(pval))) +
geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
geom_smooth(method=lm , color="red", se=TRUE)+
facet_wrap(~method,dir='h', scales = "free_y") +
ylab('P-value for MR')+
xlab('P-value for Egger intercept')
fig
dat.fig = res
fig=
ggplot(dat.fig, aes(x=-log10(Q_pval), y=-log10(pval))) +
geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
geom_smooth(method=lm , color="red", se=TRUE)+
facet_wrap(~method,dir='h', scales = "free_y") +
ylab('P-value for MR')+
xlab('P-value for Q test')
fig
dat.fig = res
fig=
ggplot(dat.fig, aes(x=snp_r2.exposure, y=-log10(pval))) +
geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
theme(
axis.line.x = element_line(size=0.5),
axis.text=element_text(size=10), axis.title=element_text(size=11),
plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
strip.text = element_text(size=8),
plot.margin=unit(c(1,1,1,3),'mm')) +
geom_smooth(method=lm , color="red", se=TRUE)+
facet_wrap(~method,dir='h', scales = "free_y") +
ylab('P-value for MR')+
xlab('SNP R2 for exposure')
fig
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
All CpGs were significant for >= 1 MR method
20 significant for >=2 MR methods
9 significant for all three MR methods + 6 CpGs insig for IVW and Weight median and MR-Egger intercept insignificant = 15 valid causal effects from CpG to MDD
Positions of these CpGs
# Find CpGs sig for all three methods
# ls.cpg.3mr = res %>%
# group_by(exposure) %>%
# count(pval<0.05) %>%
# filter(`pval < 0.05`==T, n == 3)
# # CpGs sig for IVW and WM
# ls.cpg.2mr.ivw_wm = res %>%
# filter(method!='MR Egger') %>%
# group_by(exposure) %>%
# count(pval<0.05) %>%
# filter(`pval < 0.05`==T, n == 2)
# # CpGs insig for MR Egger intercept
# ls.cpg.mregger_intercept_null = res %>%
# filter(method=='MR Egger') %>%
# filter(pval.1>0.05)
# # CpGs sig for IVW and WM and insig for MR Egger intercept
# ls.cpg.2mr.valid = ls.cpg.2mr.ivw_wm %>%
# filter(exposure %in% ls.cpg.mregger_intercept_null$exposure)
# # Final list: 3 methods + 2 methods (ivw+wm+Egger intercept null)
# ls.cpg.valid = c(as.character(ls.cpg.3mr$exposure),
# as.character(ls.cpg.2mr.valid$exposure)) %>% unique
#
# m_plot(dat=fig.dat.pT.5e_08,chr="CHR", bp="MAPINFO", snp="CpG", p="P.value",
# add_category_name=F,fig_size=c(26,13),tophits_annot=F,y_lim=c(0,120),
# man_annot=ls.cpg.valid)